#Loading Libraries
import os
import re
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import random
import splitfolders
import tensorflow as tf
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dense,Conv2D,Dropout,Flatten,MaxPooling2D, BatchNormalization,Input,concatenate
from keras.callbacks import EarlyStopping,ReduceLROnPlateau
from keras.utils import plot_model
from sklearn.metrics import classification_report, confusion_matrix
#Data Extraction
# Path where our data is located
base_path = "C:\\Users\\punit\\MA3832 Neural Network and Deep Learning\\asl_dataset\\"
# Dictionary to save our 36 classes
categories = { 0: "0",
1: "1",
2: "2",
3: "3",
4: "4",
5: "5",
6: "6",
7: "7",
8: "8",
9: "9",
10: "a",
11: "b",
12: "c",
13: "d",
14: "e",
15: "f",
16: "g",
17: "h",
18: "i",
19: "j",
20: "k",
21: "l",
22: "m",
23: "n",
24: "o",
25: "p",
26: "q",
27: "r",
28: "s",
29: "t",
30: "u",
31: "v",
32: "w",
33: "x",
34: "y",
35: "z",
}
def add_class_name_prefix(df, col_name):
df[col_name] = df[col_name].apply(
lambda x: x[re.search("_", x).start() + 1 : re.search("_", x).start() + 2]
+ "/"
+ x
)
return df
# list conatining all the filenames in the dataset
filenames_list = []
# list to store the corresponding category, note that each folder of the dataset has one class of data
categories_list = []
print("Base Path:", base_path)
for category in categories:
filenames = os.listdir(base_path + categories[category])
filenames_list = filenames_list + filenames
categories_list = categories_list + [category] * len(filenames)
df = pd.DataFrame({"filename": filenames_list, "category": categories_list})
df = add_class_name_prefix(df, "filename")
print("DataFrame Sample:")
print(df.head())
# Shuffle the dataframe
df = df.sample(frac=1).reset_index(drop=True)
Base Path: C:\Users\punit\MA3832 Neural Network and Deep Learning\asl_dataset\
DataFrame Sample:
filename category
0 0/hand1_0_bot_seg_1_cropped.jpeg 0
1 0/hand1_0_bot_seg_2_cropped.jpeg 0
2 0/hand1_0_bot_seg_3_cropped.jpeg 0
3 0/hand1_0_bot_seg_4_cropped.jpeg 0
4 0/hand1_0_bot_seg_5_cropped.jpeg 0
df
| filename | category | |
|---|---|---|
| 0 | a/hand4_a_bot_seg_2_cropped.jpeg | 10 |
| 1 | g/hand2_g_right_seg_1_cropped.jpeg | 16 |
| 2 | q/hand1_q_right_seg_1_cropped.jpeg | 26 |
| 3 | 5/hand1_5_bot_seg_5_cropped.jpeg | 5 |
| 4 | 2/hand2_2_top_seg_4_cropped.jpeg | 2 |
| ... | ... | ... |
| 2510 | l/hand1_l_left_seg_4_cropped.jpeg | 21 |
| 2511 | u/hand3_u_dif_seg_2_cropped.jpeg | 30 |
| 2512 | 0/hand1_0_top_seg_1_cropped.jpeg | 0 |
| 2513 | y/hand1_y_left_seg_3_cropped.jpeg | 34 |
| 2514 | c/hand2_c_right_seg_1_cropped.jpeg | 12 |
2515 rows × 2 columns
print("number of elements = ", len(df))
number of elements = 2515
#Data Exploration
plt.figure(figsize=(40, 40))
# Define the number of rows and columns in your grid
num_rows = 10
num_columns = 15
# Calculate the total number of subplots
total_subplots = num_rows * num_columns
for i in range(total_subplots):
if i < len(df):
path = base_path + df.filename[i]
img = plt.imread(path)
plt.subplot(num_rows, num_columns, i + 1)
plt.imshow(img)
plt.title(categories[df.category[i]], fontsize=20, fontstyle='italic')
plt.axis("off")
plt.tight_layout() # Ensure proper spacing between subplots
plt.show()